Results¶
This notebook is for rendering figures based on the results of model.
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
# Initialize Plotly to work in Jupyter notebook
init_notebook_mode(connected=True)
pio.renderers.default = 'notebook'
Training¶
Loading Training Data¶
data_directory = "./output/data/"
df_dqn = pd.read_csv(data_directory+"DQN/"+"training_metrics.csv")
df_ddqn = pd.read_csv(data_directory+"dDQN/"+"training_metrics.csv")
df_dueldqn = pd.read_csv(data_directory+"duelDQN/"+"training_metrics.csv")
Performance Comparison: Learning Curves¶
Learning Curve Helper Functions¶
def plot_total_reward_and_moving_average(df, window_size=10):
fig = go.Figure()
colors = px.colors.qualitative.Plotly # Use Plotly's qualitative color palette
# Loop through each column in the dataframe
for idx, column in enumerate(df.columns):
total_reward_per_episode = df[column].values
# Calculate the moving average
moving_avg_rewards = moving_average(total_reward_per_episode, window_size)
# Calculate the rolling standard deviation for the shaded area
rolling_std = np.array([np.std(total_reward_per_episode[i:i + window_size]) for i in range(len(moving_avg_rewards))])
lower_bound = moving_avg_rewards - rolling_std
upper_bound = moving_avg_rewards + rolling_std
# Padding for the moving average and bounds
padding = (len(total_reward_per_episode) - len(moving_avg_rewards)) // 2
moving_avg_rewards_padded = np.pad(moving_avg_rewards, (padding, padding), 'edge')
lower_bound_padded = np.pad(lower_bound, (padding, padding), 'edge')
upper_bound_padded = np.pad(upper_bound, (padding, padding), 'edge')
# Adjusting the padded arrays to match total_reward_per_episode length
if len(moving_avg_rewards_padded) < len(total_reward_per_episode):
diff = len(total_reward_per_episode) - len(moving_avg_rewards_padded)
moving_avg_rewards_padded = np.pad(moving_avg_rewards_padded, (0, diff), 'edge')
lower_bound_padded = np.pad(lower_bound_padded, (0, diff), 'edge')
upper_bound_padded = np.pad(upper_bound_padded, (0, diff), 'edge')
else:
moving_avg_rewards_padded = moving_avg_rewards_padded[:len(total_reward_per_episode)]
lower_bound_padded = lower_bound_padded[:len(total_reward_per_episode)]
upper_bound_padded = upper_bound_padded[:len(total_reward_per_episode)]
line_color = colors[idx % len(colors)]
fill_color = lighter_color(line_color)
# Add the moving average line
fig.add_trace(go.Scatter(
x=np.arange(len(total_reward_per_episode)),
y=moving_avg_rewards_padded,
mode='lines',
line=dict(color=line_color),
name=f'{column} Moving Average',
legendgroup=f'{column}', # Group line and shaded area
showlegend=True
))
# Add the shaded area for the std deviation
fig.add_trace(go.Scatter(
x=np.concatenate([np.arange(len(total_reward_per_episode)), np.arange(len(total_reward_per_episode))[::-1]]),
y=np.concatenate([upper_bound_padded, lower_bound_padded[::-1]]),
fill='toself',
fillcolor=fill_color,
line=dict(color='rgba(255, 255, 255, 0)'),
name=f'{column} Std Dev',
legendgroup=f'{column}', # Group with corresponding line
showlegend=False, # Do not show separate legend for shaded area
))
fig.update_layout(
title='Learning Curve',
xaxis_title='Episode',
yaxis_title='Total Reward',
width=1000, # Set the width of the plot
height=800, # Set the height of the plot
legend=dict(
x=0.99, # Position at 99% along the x-axis
y=0.01, # Position at 1% along the y-axis
xanchor='right', # Anchor legend box's right corner
yanchor='bottom' # Anchor legend box's bottom corner
),
showlegend=True
)
pio.show(fig)
def moving_average(values, window):
weights = np.repeat(1.0, window) / window
sma = np.convolve(values, weights, 'valid')
return sma
def combine_columns(dfs, column_name='Total Reward'):
combined_df = pd.concat([df[[column_name]].rename(columns={column_name: f'{name}_{column_name}'}) for df, name in dfs], axis=1)
return combined_df
def lighter_color(color, intensity=0.2):
# Parse color into r, g, b
color = color.lstrip('#')
lv = len(color)
rgb = tuple(int(color[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))
return 'rgba({}, {}, {}, {})'.format(rgb[0], rgb[1], rgb[2], intensity)
dataframes = [(df_dqn, 'DQN'), (df_ddqn, 'dDQN'), (df_dueldqn, 'duelDQN')]
# Combine the Total Reward columns
combined_df = combine_columns(dataframes, column_name='Total Reward')
Figure: Learning Curve¶
plot_total_reward_and_moving_average(combined_df, window_size=10)
Training Loss¶
Training loss curves
def plot_columns(df,title,ytitle):
fig = go.Figure()
# Add a trace for each column in the dataframe
for i, column in enumerate(df.columns):
smoothed_loss = moving_average(df[column], window=5)
fig.add_trace(go.Scatter(
x=df.index,
y=smoothed_loss,
mode='lines',
name=column,
line=dict(width=2)
))
# Update layout
if ytitle == "Total Reward":
fig.update_layout(
title=title,
xaxis_title='Episode',
yaxis_title=ytitle,
width=800,
height=600,
legend=dict(
x=0.99, # Position at 99% along the x-axis
y=0.1, # Position at 1% along the y-axis
xanchor='right', # Anchor legend box's right corner
yanchor='bottom' # Anchor legend box's bottom corner
),
showlegend=True
)
else:
fig.update_layout(
title=title,
xaxis_title='Episode',
yaxis_title=ytitle,
width=800,
height=600,
legend=dict(
x=0.99, # Position at 99% along the x-axis
y=0.7, # Position at 1% along the y-axis
xanchor='right', # Anchor legend box's right corner
yanchor='bottom' # Anchor legend box's bottom corner
),
showlegend=True
)
# Show plot
fig.show()
loss_df = combine_columns(dataframes, column_name='Average Loss')
plot_columns(loss_df,'Average Loss for Different Models','Average Loss')
Comparison of Average Training Time¶
Average training time (over 50 episodes) of each architecture with identical settings.
time_df = combine_columns(dataframes, column_name='Training Time')
time_df.mean()
DQN_Training Time 10.719233 dDQN_Training Time 10.697270 duelDQN_Training Time 24.309608 dtype: float64
Hyperparameters¶
Effects of hyperparameters on DQN model.
Epsilon Decay¶
df_eps75 = pd.read_csv(data_directory+"DQN_eps-0.75/"+"training_metrics.csv")
df_eps9 = pd.read_csv(data_directory+"DQN_eps-0.9/"+"training_metrics.csv")
df_eps999 = pd.read_csv(data_directory+"DQN_eps-0.999/"+"training_metrics.csv")
decay = [(df_eps75, 'eps=0.75'), (df_eps9, 'eps=0.9'), (df_dqn, 'eps=0.995'), (df_eps999, 'eps=0.999')]
decay_df = combine_columns(decay, column_name='Total Reward')
decay2_df = combine_columns(decay, column_name='Average Loss')
plot_columns(decay_df,"Effect of Epsilon Decay","Total Reward")
plot_columns(decay2_df,"Effect of Epsilon Decay","Average Loss")
Discount Factor¶
df_gamma75 = pd.read_csv(data_directory+"DQN_gamma-0.75/"+"training_metrics.csv")
df_gamma9 = pd.read_csv(data_directory+"DQN_gamma-0.9/"+"training_metrics.csv")
df_gamma5 = pd.read_csv(data_directory+"DQN_gamma-0.5/"+"training_metrics.csv")
discount = [(df_gamma5, 'gamma=0.5'), (df_gamma75, 'gamma=0.75'), (df_gamma9, 'gamma=0.9'), (df_dqn, 'gamma=0.99')]
discount_df = combine_columns(discount, column_name='Total Reward')
discount2_df = combine_columns(discount, column_name='Average Loss')
plot_columns(discount_df,"Effect of Discount Factor","Total Reward")
plot_columns(discount2_df,"Effect of Discount Factor","Average Loss")
Learning Rate¶
df_lr01 = pd.read_csv(data_directory+"DQN_lr-0.01/"+"training_metrics.csv")
df_lr001 = pd.read_csv(data_directory+"DQN_lr-0.001/"+"training_metrics.csv")
df_lr00001 = pd.read_csv(data_directory+"DQN_lr-0.00001/"+"training_metrics.csv")
df_lr000001 = pd.read_csv(data_directory+"DQN_lr-0.000001/"+"training_metrics.csv")
learning = [(df_lr01, 'lr=0.01'), (df_lr001, 'lr=0.001'), (df_dqn, 'lr=0.0001'), (df_lr00001, 'lr=0.00001'), (df_lr000001, 'lr=0.000001')]
lr_df = combine_columns(learning, column_name='Total Reward')
lr2_df = combine_columns(learning, column_name='Average Loss')
plot_columns(lr_df,"Effect of Learning Rate","Total Reward")
plot_columns(lr2_df,"Effect of Learning Rate","Average Loss")
Evaluation¶
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
cpu
Environment¶
We can load in our environment, this must match the training environment which had a state space of (100,100), 3 UAVs, and 20 users.
from core.uav_env import UAVEnv
env = UAVEnv(num_users=20, num_uavs=3, area_size=(100, 100))
state_size = env.observation_space.shape[0] * env.observation_space.shape[1]
action_size = env.action_space.n
Visualize Environment¶
We can visualize the environment through the visualize_uav_positions from ./visualization/visualize.py. This works on the initial state to N steps. Please not it becomes laggy with large number of time steps.
from visualization.visualize import visualize_uav_positions
env.reset()
visualize_uav_positions(env.user_positions, env.uav_positions)
Configuration¶
Loaded the config file from training. The input is the specific architecture. The options are "DQN", "DoubleDQN", and "DuelingDQN"
from configs.config import Config
config_dqn = Config("DQN")
config_ddqn = Config("DoubleDQN")
config_dueldqn = Config("DuelingDQN")
Here's an example of the configuration parameters.
config_dqn.to_dict()
{'network': 'DQN',
'num_episodes': 50,
'batch_size': 64,
'gamma': 0.75,
'learning_rate': 1e-05,
'epsilon_start': 1.0,
'epsilon_end': 0.1,
'epsilon_decay': 0.75,
'target_update': 10,
'checkpoint_interval': 10,
'evaluation_interval': 5,
'replay_buffer_capacity': 5000}
Load Trained Policy¶
We can load the trained models.
dqn_model = torch.load(data_directory+"DQN"+'/best_model/best_model.pth.tar', map_location=device)
ddqn_model = torch.load(data_directory+"dDQN"+'/best_model/best_model.pth.tar', map_location=device)
dueldqn_model = torch.load(data_directory+"duelDQN"+'/best_model/best_model.pth.tar', map_location=device)
We can set up the policy nets.
from core.dqn import DQN, DuelingDQN
# DQN
policy_net_dqn = DQN(state_size, env.num_uavs, action_size).to(device)
policy_net_dqn.load_state_dict(dqn_model['policy_net_state_dict'])
# Double DQN
policy_net_ddqn = DQN(state_size, env.num_uavs, action_size).to(device)
policy_net_ddqn.load_state_dict(ddqn_model['policy_net_state_dict'])
# Dueling DQN
policy_net_dueldqn = DuelingDQN(state_size, env.num_uavs, action_size).to(device)
policy_net_dueldqn.load_state_dict(dueldqn_model['policy_net_state_dict'])
<All keys matched successfully>
Playing Trained Policy¶
We can use evaluate_policy from ./evaluation/evaluate.py to play our trained policy.
from evaluation.evaluate import evaluate_policy
average_reward_dqn, user_positions_dqn, uav_positions_dqn = evaluate_policy(env, policy_net_dqn, num_episodes=1, device=device)
average_reward_ddqn, user_positions_ddqn, uav_positions_ddqn = evaluate_policy(env, policy_net_ddqn, num_episodes=1, device=device)
average_reward_dueldqn, user_positions_dueldqn, uav_positions_dueldqn = evaluate_policy(env, policy_net_dueldqn, num_episodes=1, device=device)
Episode 0 UAV Performance Metrics: Total Distance Traveled: 4999.15 Average Distance Per Step: 5.00 Number of Users Covered: 20 Average Coverage Per Step: 0.02 Total Time: 1000 Total Rewards: -234668.89 Average Reward Per Step: -234.67 Average Reward over 1 episodes: -234668.8890813881 Episode 0 UAV Performance Metrics: Total Distance Traveled: 4969.21 Average Distance Per Step: 4.97 Number of Users Covered: 19 Average Coverage Per Step: 0.02 Total Time: 1000 Total Rewards: -192344.76 Average Reward Per Step: -192.34 Average Reward over 1 episodes: -192344.75784965386 Episode 0 UAV Performance Metrics: Total Distance Traveled: 3569.58 Average Distance Per Step: 3.57 Number of Users Covered: 19 Average Coverage Per Step: 0.02 Total Time: 1000 Total Rewards: -439780.51 Average Reward Per Step: -439.78 Average Reward over 1 episodes: -439780.5066119824
Animation¶
We can animated the results into a .mp4 file using save_animation from ./visualization/animate.py. ffmpeg is required for rendering the video.
from visualization.animate import save_animation
#save_animation(user_positions_dqn, uav_positions_dqn, filename='./output/videos/dqn.mp4')
#save_animation(user_positions_ddqn, uav_positions_ddqn, filename='./output/videos/ddqn.mp4')
#save_animation(user_positions_dueldqn, uav_positions_dueldqn, filename='./output/videos/dueldqn.mp4')
Coverage¶
Calculate the AUC of coverage vs threshold radius
Coverage Helper Functions¶
Calculating average (in time step) coverage per radius
def calculate_coverage_percentage(user_positions_list, uav_positions_list, radii):
num_time_steps = len(user_positions_list)
num_users = user_positions_list[0][0].shape[0] # Corrected to access the first array within the list
# Stack all user and UAV positions across all time steps
all_user_positions = np.stack([pos[0] for pos in user_positions_list]) # Corrected to access the array
all_uav_positions = np.stack([pos[0] for pos in uav_positions_list]) # Corrected to access the array
# Calculate distances between each user and all UAVs at each time step
distances = np.linalg.norm(
all_user_positions[:, :, np.newaxis, :] - all_uav_positions[:, np.newaxis, :, :],
axis=3
) # Shape: (num_time_steps, num_users, num_uavs)
# Find the minimum distance for each user to any UAV at each time step
min_distances = np.min(distances, axis=2) # Shape: (num_time_steps, num_users)
# Initialize an array to hold coverage counts for each radius
coverage_counts = np.zeros(len(radii)) # Shape: (num_radii,)
# Iterate over each radius to determine coverage
for i, radius in enumerate(radii):
# Check how many users are within this radius at each time step
covered_users_per_step = np.sum(min_distances <= radius, axis=1) # Shape: (num_time_steps,)
# Average the coverage over all time steps
coverage_counts[i] = np.mean(covered_users_per_step)
# Calculate the coverage percentage for each radius
coverage_percentages = (coverage_counts / num_users) * 100 # Shape: (num_radii,)
return coverage_percentages
# Define the range of coverage radii
radii = np.linspace(0, 100, num=100)
# Calculate the average coverage percentages
coverage_percentages = calculate_coverage_percentage(user_positions_dqn, uav_positions_dqn, radii)
print("Coverage Percentages:", coverage_percentages)
Coverage Percentages: [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 5. 5. 5. 10. 15. 15. 15. 15. 15. 20. 20. 20. 20. 20. 25. 25. 30. 35. 40. 45. 45. 45. 45. 45. 50. 50. 50. 50. 55. 55. 55. 60. 60. 60. 60. 60. 60. 65. 65. 65. 70. 70. 70. 70. 75. 75. 75. 75. 75. 80. 80. 85. 90. 90. 90. 90. 90. 90. 95. 95. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.]
Plotting coverage and calculating AUC
from sklearn.metrics import auc
def plot_coverage_auc_single(user_positions_dqn, uav_positions_dqn, radii):
# Calculate average coverage percentages across all time steps for each radius
coverage_percentages = calculate_coverage_percentage(user_positions_dqn, uav_positions_dqn, radii)
# Compute AUC
area_under_curve = auc(radii, coverage_percentages)
# Create plot
fig = go.Figure()
# Add trace for coverage percentage
fig.add_trace(go.Scatter(
x=radii,
y=coverage_percentages,
mode='lines',
name=f'Coverage Percentage (AUC = {area_under_curve:.2f})',
line=dict(color='royalblue', width=2)
))
# Update layout
fig.update_layout(
title='Coverage Percentage vs. Coverage Radius',
xaxis_title='Coverage Radius',
yaxis_title='Coverage Percentage',
width=600, # Set the width of the plot
height=600, # Set the height of the plot
showlegend=True,
template='plotly_white',
legend=dict(x=0.99, y=0.1, xanchor='right', yanchor='bottom')
)
# Show plot
fig.show()
def plot_coverage_auc(user_positions_dqn, uav_positions_dqn, user_positions_ddqn, uav_positions_ddqn, user_positions_duel_dqn, uav_positions_duel_dqn, radii):
# Calculate average coverage percentages across all time steps for each radius
coverage_percentages_dqn = calculate_coverage_percentage(user_positions_dqn, uav_positions_dqn, radii)
coverage_percentages_ddqn = calculate_coverage_percentage(user_positions_ddqn, uav_positions_ddqn, radii)
coverage_percentages_duel_dqn = calculate_coverage_percentage(user_positions_duel_dqn, uav_positions_duel_dqn, radii)
# Compute AUC for each model
auc_dqn = auc(radii, coverage_percentages_dqn)
auc_ddqn = auc(radii, coverage_percentages_ddqn)
auc_duel_dqn = auc(radii, coverage_percentages_duel_dqn)
# Create plot
fig = go.Figure()
# Add trace for DQN
fig.add_trace(go.Scatter(
x=radii,
y=coverage_percentages_dqn,
mode='lines',
name=f'DQN (AUC = {auc_dqn:.2f})',
line=dict(color='blue', width=2)
))
# Add trace for DDQN
fig.add_trace(go.Scatter(
x=radii,
y=coverage_percentages_ddqn,
mode='lines',
name=f'DDQN (AUC = {auc_ddqn:.2f})',
line=dict(color='green', width=2)
))
# Add trace for Duel DQN
fig.add_trace(go.Scatter(
x=radii,
y=coverage_percentages_duel_dqn,
mode='lines',
name=f'Duel DQN (AUC = {auc_duel_dqn:.2f})',
line=dict(color='red', width=2)
))
# Update layout
fig.update_layout(
title='Coverage Percentage vs. Coverage Radius for Different Models',
xaxis_title='Coverage Radius',
yaxis_title='Coverage Percentage',
width=600, # Set the width of the plot
height=600, # Set the height of the plot
showlegend=True,
template='plotly_white',
legend=dict(x=0.99, y=0.1, xanchor='right', yanchor='bottom')
)
# Show plot
fig.show()
Figure: Coverage AUC¶
radii = np.linspace(0, 100, num=100)
plot_coverage_auc(user_positions_dqn, uav_positions_dqn, user_positions_ddqn, uav_positions_ddqn, user_positions_dueldqn, uav_positions_dueldqn, radii)
num_time_steps = len(user_positions_dqn)
start_index = num_time_steps // 2
plot_coverage_auc(user_positions_dqn[start_index:], uav_positions_dqn[start_index:], user_positions_ddqn[start_index:], uav_positions_ddqn[start_index:], user_positions_dueldqn[start_index:], uav_positions_dueldqn[start_index:], radii)
Best Model¶
from core.dqn import DQN, DuelingDQN
dqn_model = torch.load(data_directory+"DQN_best"+'/best_model.pth.tar', map_location=device)
ddqn_model = torch.load(data_directory+"ddDQN_best"+'/best_model.pth.tar', map_location=device)
config_dqn = Config(network="DQN",
num_episodes=50,
batch_size=64,
gamma=0.75,
epsilon_start=1.0,
epsilon_end=0.1,
epsilon_decay=0.75,
target_update=10,
checkpoint_interval=10,
replay_buffer_capacity=5000,
learning_rate=0.00001,
evaluation_interval=5)
policy_net_dqn = DuelingDQN(state_size, env.num_uavs, action_size).to(device)
policy_net_dqn.load_state_dict(dqn_model['policy_net_state_dict'])
policy_net_ddqn = DQN(state_size, env.num_uavs, action_size).to(device)
policy_net_ddqn.load_state_dict(ddqn_model['policy_net_state_dict'])
average_reward_dqn, user_positions_dqn, uav_positions_dqn = evaluate_policy(env, policy_net_dqn, num_episodes=1, device=device)
average_reward_ddqn, user_positions_ddqn, uav_positions_ddqn = evaluate_policy(env, policy_net_ddqn, num_episodes=1, device=device)
plot_coverage_auc_single(user_positions_dqn, uav_positions_dqn, radii)
plot_coverage_auc_single(user_positions_ddqn, uav_positions_ddqn, radii)
Episode 0 UAV Performance Metrics: Total Distance Traveled: 3479.63 Average Distance Per Step: 3.48 Number of Users Covered: 18 Average Coverage Per Step: 0.02 Total Time: 1000 Total Rewards: -388642.79 Average Reward Per Step: -388.64 Average Reward over 1 episodes: -388642.7925946241 Episode 0 UAV Performance Metrics: Total Distance Traveled: 5555.42 Average Distance Per Step: 5.56 Number of Users Covered: 20 Average Coverage Per Step: 0.02 Total Time: 1000 Total Rewards: -133419.70 Average Reward Per Step: -133.42 Average Reward over 1 episodes: -133419.69525320735